The six point example uses six data points. Yes, it is small and boring.
x <- c(3, 4, 4, 5, 6, 6)
y <- c(3, 3, 4, 6, 5, 7)
sixpts <- data.frame(x,y)
sixpts
## x y
## 1 3 3
## 2 4 3
## 3 4 4
## 4 5 6
## 5 6 5
## 6 6 7
The points can be plotted.
p_load(ggplot2)
ggplot(sixpts, aes(x = x, y = y)) +
geom_point() +
theme_bw() + # Add theme for cleaner look
coord_cartesian(xlim = c(0,7), ylim=c(-1,8))+
annotate(geom="segment",
y=seq(-1,8,0.25),
yend = seq(-1,8,0.25),
x=-0.25,
xend=7.25,
col="lightgrey") +
annotate(geom="segment",
x=seq(-0.25,7.25,0.25),
xend = seq(-0.25,7.25,0.25),
y=-1,
yend=8,
col="lightgrey") +
coord_fixed()
## Coordinate system already present.
## ℹ Adding new coordinate system, which will replace the existing one.
The “best fit” linear regression line can be added.
ggplot(sixpts, aes(x = x, y = y)) +
geom_smooth(method = "lm", se = FALSE,
color = "darkgrey") + # Plot regression slope
geom_point() +
theme_bw() +
coord_cartesian(xlim = c(0,7), ylim=c(-1,8))+
annotate(geom="segment", y=seq(-1,8,0.25), yend = seq(-1,8,0.25),
x=-0.25, xend=7.25, col="lightgrey") +
annotate(geom="segment", x=seq(-0.25,7.25,0.25),
xend = seq(-0.25,7.25,0.25), y=-1, yend=8, col="lightgrey") +
coord_fixed()
## Coordinate system already present.
## ℹ Adding new coordinate system, which will replace the existing one.
## `geom_smooth()` using formula = 'y ~ x'
The vertical errors (residuals) can be observed.
sixpts.lm = lm(y ~ x, data=sixpts)
sixpts$predicted <- predict(sixpts.lm)
ggplot(sixpts, aes(x = x, y = y)) +
geom_smooth(method = "lm", se = FALSE,
color = "darkgrey") + # Plot regression slope
geom_segment(aes(xend = x, yend = predicted),
alpha = .2) + # alpha to fade lines
geom_point() +
geom_point(aes(y = predicted), shape = 1) +
theme_bw() + # Add theme for cleaner look
coord_cartesian(xlim = c(0,7), ylim=c(-1,8))+
annotate(geom="segment", y=seq(-1,8,0.25), yend = seq(-1,8,0.25),
x=-0.25, xend=7.25, col="lightgrey") +
annotate(geom="segment", x=seq(-0.25,7.25,0.25), xend = seq(-0.25,7.25,0.25),
y=-1, yend=8, col="lightgrey") +
coord_fixed()
## Coordinate system already present.
## ℹ Adding new coordinate system, which will replace the existing one.
## `geom_smooth()` using formula = 'y ~ x'
We now show that the regression line goes through \((\bar{x},\ \bar{y})\) = (4.6667, 4.6667).
ggplot(sixpts, aes(x = x, y = y)) +
geom_smooth(method = "lm", se = FALSE,
color = "darkgrey") + # Plot regression slope
geom_segment(aes(xend = x, yend = predicted),
alpha = .2) + # alpha to fade lines
geom_point() +
geom_point(aes(y = predicted), shape = 1) +
theme_bw() + # Add theme for cleaner look
geom_hline(yintercept=mean(y), lty = 3, col = 1) +
geom_vline(xintercept=mean(x), lty = 3, col = 1) +
coord_cartesian(xlim = c(0,7), ylim=c(-1,8))+
annotate(geom="segment", y=seq(-1,8,0.25), yend = seq(-1,8,0.25),
x=-0.25, xend=7.25, col="lightgrey") +
annotate(geom="segment", x=seq(-0.25,7.25,0.25), xend = seq(-0.25,7.25,0.25),
y=-1, yend=8, col="lightgrey") +
coord_fixed()
## Coordinate system already present.
## ℹ Adding new coordinate system, which will replace the existing one.
## `geom_smooth()` using formula = 'y ~ x'
The regression equation and associated measures of its quality are easy to access.
sixpts.lm = lm(y ~ x, data=sixpts)
summary(sixpts.lm)
##
## Call:
## lm(formula = y ~ x, data = sixpts)
##
## Residuals:
## 1 2 3 4 5 6
## 0.22727 -0.90909 0.09091 0.95455 -1.18182 0.81818
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6364 1.7405 -0.366 0.7332
## x 1.1364 0.3629 3.131 0.0351 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9828 on 4 degrees of freedom
## Multiple R-squared: 0.7102, Adjusted R-squared: 0.6378
## F-statistic: 9.804 on 1 and 4 DF, p-value: 0.03515
anova(sixpts.lm)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 9.4697 9.4697 9.8039 0.03515 *
## Residuals 4 3.8636 0.9659
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1